%  DEMSPEECH_DUAL  Sigma-Point Kalman Filter based Speech Enhancement Demonstration.
%
%  A single phoneme of speech, corrupted by additive colored noise is enhanced
%  (cleaned up) through Dual SPKF (SRCDKF) based estimation.
%
%  A single speech phoneme sampled at 8kHz is corrupted by additive colored (pink)
%  noise. We use a simple linear autoregressive model (10th order) to model the
%  generative model of the speech signal. We model the pink noise by a known 6th
%  order linear autoregressive process driven by white Gaussian noise with known
%  variance. The SNR of the noisy signal (y=clean+noise) is 0dB.
%
%  The colored noise modeling (augmented state space model) is done according to
%  the method proposed in: "Filtering of Colored Noise for Speech Enhancment and
%  Coding", by J. D. Gibson, B. Koo and S. D. Gray, IEEE Transactions on Signal
%  Processing, Vol. 39, No. 8, August 1991.
%
%  See also : GSSM_SPEECH_LINEAR
%   Copyright (c) Oregon Health & Science University (2006)
%
%   This file is part of the ReBEL Toolkit. The ReBEL Toolkit is available free for
%   academic use only (see included license file) and can be obtained from
%   http://choosh.csee.ogi.edu/rebel/.  Businesses wishing to obtain a copy of the
%   software should contact rebel@csee.ogi.edu for commercial licensing information.
%
%   See LICENSE (which should be part of the main toolkit distribution) for more
%   detail.


%===============================================================================================

clear all; close all; clc;

if ~exist('aryule')
  error(' [demspeech_dual] This demonstration requires the Matlab Signal Processing Toolbox to function correctly.');
end

help demspeech_dual

disp(' ');
disp(' ');

disp('Two speech time-series estimates are extracted from the estimated state vectors.');
disp('The first is generated by taking the first component (zero''th lag term) of the');
disp('state vector. The second estimate is generated by using the last (10th lag)');
disp('component of the state vector, which is a fixed-lag smoothed estimate (it uses');
disp('more data).');
disp(' ');
disp('After each iteration (over the whole speech sequence) of the filter, the normalised');
disp('MSE of each estimate is displayed. Three speech sequences are also played over the');
disp('audio device: The first is the noisy sequence, the second is the first estimate and');
disp('the third is the second (full-lag) estimate.')
disp(' ');

dosound = input('Do you want to enable the audio component of this demo (0=no 1=yes) ? ');

%--- General setup

addrelpath('../gssm');         % add relative search path to example GSSM files to MATLABPATH
addrelpath('../data');         % add relative search path to example data files to MATLABPATH


%-- Load clean speech, noise and noisy speech (0dB SNR)

load speech_data;                       %

B=0;
N=1500;

clean = clean(B+1:B+N);
noisy = noisy(B+1:B+N);
noise = noise(B+1:B+N);

%-- Display speech waveforms

figure(1);clf; subplot('position',[0.025 0.1 0.95 0.8]);
p1=plot(noisy,'k+'); hold on;
p2=plot(clean,'b');
xlabel('time');
legend([p1 p2],'noisy','clean');
title('ReBEL Speech Enhancement Demo');
axis tight
drawnow

%-- Initialise GSSM data structure

model = gssm_speech_linear('init');            % initialize

%=====================================================================
%  Generate InferenceDS data structures for dual estiamtion. We need
%  one for the state estimator and one for the parameter estimator.

ftype = 'srcdkf';                                     % we will use square-root central difference Kalman filter (SRCDKF)
                                                      % estimator

paramParamIdxVec = 1:model.speech_taps;               % index vector of the system parameters to be estimated (don't estimate
                                                      % colored noise model parameters)

  %-- Setup state estimator
  Arg.type = 'state';                                   % inference type (state estimation)
  Arg.tag = 'State estimation for GSSM_SPEECH system.'; % arbitrary ID tag
  Arg.model = model;                                    % GSSM data structure of external system

  InfDS_SE = geninfds(Arg);                             % Create inference data structure and
  [pNoise_SE, oNoise_SE, InfDS_SE] = gensysnoiseds(InfDS_SE, ftype);   % generate process and observation
                                                             % noise sources for state estimator

  %-- Setup parameter estimator
  clear Arg;
  Arg.type = 'parameter';                                % inference type (parameter estimation)
  Arg.tag = 'Parameter estimation for GSSM_SPEECH system.'; % arbitrary ID tag
  Arg.paramFunSelect = 'both';                           % We use the full system dynamics as observation, i.e. obs=hfun(ffun(x))
  Arg.paramParamIdxVec = paramParamIdxVec;               % parameters to be estimated index vector (don't estimate colored
                                                         % noise model)
  Arg.model = model;                                     % GSSM data structure of external system

  %-- Explicitely define a observation noise source for the parameter estimator. This is needed for the colored noise
  %   case, since it uses an implicit (within the augmented state) observation noise formulation. When a parameter estimator
  %   is derived from this type of model, one has to override the default (empty/dummy) observation noise source that is
  %   generated.

  Arg.model.Ndim = 1;                                    % We need to override these field for the parameter estimator

  oNoise_Arg.type = 'gaussian';                          % and actually define a true observation noise source.
  oNoise_Arg.cov_type = 'full';
  oNoise_Arg.dim = 1;
  oNoise_Arg.mu = 0;
  oNoise_Arg.cov  = sqrt(pNoise_SE.cov(2,2));

  Arg.model.oNoise = gennoiseds(oNoise_Arg);

  InfDS_PE = geninfds(Arg);

  [pNoise_PE, oNoise_PE, InfDS_PE] = gensysnoiseds(InfDS_PE, ftype);    % generate process and observation
                                                              % noise sources for state estimator


%-- ESTIMATE SIGNAL

N = length(noisy);                                    % number of samples in frame

Xh_SE = zeros(InfDS_SE.statedim, N);                  % setup estimation buffers
Xh_PE = zeros(InfDS_PE.statedim, N);                  %     "              "

init_mod = aryule(noisy, model.speech_taps);          % initial model is fit to noisy speech
init_mod = -1*init_mod(2:end);

Xh_PE(:,1) = init_mod(:);                             % initial model

Sx_SE = eye(InfDS_SE.statedim);                       % initial Cholesky factor of SE estimate covariance
Sx_PE = eye(InfDS_PE.statedim);                       % initial Cholesky factor of PE estimate covariance

InfDS_SE.spkfParams = [sqrt(3)];                      % CDKF scale parameter for SE estimator
InfDS_PE.spkfParams = [sqrt(3)];                      % CDKF scale parameter for PE estimator

number_of_runs = 10;                                  % number of iterations over data

mse = zeros(2,number_of_runs);                        % mean square error buffer

pNoise_PE.cov = 1*eye(InfDS_PE.statedim);             % set initial covariance for PE proces noise

pNoise_PE.adaptMethod = 'anneal';                     % setup PE process noise adaptation method
pNoise_PE.adaptParams = [0.995 1e-7];                 % We use the annealing method with a anneal factor of
                                                      % 0.98 and a variance floor of 1e-7

for k=1:number_of_runs,

    fprintf(' [%d:%d] ',k,number_of_runs);

    % For dual estimation we iterate over the data, alternating between a state estimation step and a
    % parameter estimation step

    for j=2:N,

      %--- First, we set the model parmaters of the state estimator using the surrent output of the parameter
      %--- estimator
      InfDS_SE.model = InfDS_SE.model.setparams( InfDS_SE.model, Xh_PE(:,j-1), paramParamIdxVec);

      %--- Now call the state estimator
      [Xh_SE(:,j), Sx_SE] = srcdkf(Xh_SE(:,j-1), Sx_SE, pNoise_SE, oNoise_SE, noisy(:,j), [], [], InfDS_SE);

      %--- And then the parameter estimator
      [Xh_PE(:,j), Sx_PE, pNoise_PE] = srcdkf(Xh_PE(:,j-1), Sx_PE, pNoise_PE, oNoise_PE, noisy(:,j), [], Xh_SE(:,j-1), InfDS_PE);

    end

    noisy_c = noisy(1:end-model.speech_taps+1);
    clean_c = clean(1:end-model.speech_taps+1);
    estim_1 = Xh_SE(1,1:end-model.speech_taps+1);
    estim_2 = Xh_SE(model.speech_taps,model.speech_taps:end);

    figure(1);clf; subplot('position',[0.025 0.1 0.95 0.8]);
    p1 = plot(noisy_c,'k+'); hold on;
    p2 = plot(clean_c,'b');
    p3 = plot(estim_1,'m');
    p4 = plot(estim_2,'r'); hold off
    xlabel('time');
    legend([p1 p2 p3 p4],'noisy','clean','estimate (0 lag)','estimate (full lag)');
    title('ReBEL Speech Enhancement Demo');
    axis tight

    figure(2);
    plot(Xh_PE'); hold off
    xlabel('k');
    ylabel('parameters');
    title('Estimate of model parameters');

    drawnow


    mse(1,k) = mean((estim_1-clean_c).^2)/var(noisy_c);
    mse(2,k) = mean((estim_2-clean_c).^2)/var(noisy_c);

    fprintf('  Normalized MSE : 0-lag estimate  = %4.3f   full-lag estimate =  %4.3f\n',mse(1,k),mse(2,k));

    if dosound
      fprintf('   Playing : noisy sample...');
      soundsc(noisy_c,8000,16);
      pause(1);
      fprintf(' 0-lag estimate...');
      soundsc(estim_1,8000,16);
      pause(1);
      fprintf(' full-lag estimate...');
      soundsc(estim_2,8000,16);
      pause(1);
      fprintf(' clean sample.\n');
      soundsc(clean_c,8000,16);
    end

    %-- Reset state estimates and covariance
    Xh_SE(:,1) = zeros(InfDS_SE.statedim,1);
    Sx_SE = eye(InfDS_SE.statedim);

    %-- Copy last estimate of model parameters to initial buffer position for next iteration...
    Xh_PE(:,1) = Xh_PE(:,end);

end

%--- House keeping

remrelpath('../gssm');       % remove relative search path to example GSSM files from MATLABPATH
remrelpath('../data');       % remove relative search path to example data files from MATLABPATH
